library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(ggplot2)
library(ggpubr)
library(RColorBrewer)
library(pheatmap)
library(stats)
library(ggfortify)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(grid)
library(ComplexHeatmap)
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ rstatix::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(corrplot)
## corrplot 0.95 loaded
This script explores how the markers Galectin-4, HMGA2 and GATA6 are enriched in tumor cells in different tissue locations in an injection model of murine PDAC.
Transparancy note: An issue discovered after the previous script for this injection model was made public in GitHub, used for teh preprint, is that cells that were completely negative for quantified marker (at that time, only HMGA2) were not included in the wilcoxon testing also displayed in the boxplot. This is adjusted in the current script.
We aimed here to rather use the markers as a panel that can be used to call an overall subtype in the ROIs, than single markers on their own.
setwd("/Users/sara.soderqvist/Library/CloudStorage/OneDrive-KarolinskaInstitutet/Writing_/Adaptive PDAC paper/Code/source data files to read in/In vivo") # adjust the working directory
HMGA2.data_ori <- read.table('read in_in vivo_injection model.tsv',
sep = '\t', header = T)
dim(HMGA2.data_ori) #27 860 observations (=Cells) in total
[1] 27860 12
knitr::kable(head(HMGA2.data_ori))
| Image | Object.ID | Object.type | Name | Classification | Parent | ROI | Nucleus..DAPI.mean | Nucleus..HMGA2.mean | Cell..Galectin.4.mean | Cell..Vimentin.mean | Nucleus..GATA6.mean |
|---|---|---|---|---|---|---|---|---|---|---|---|
| HA_29_StromaA | 0685f1e5-48ce-4260-8ead-4a23407ce0df | Cell | NA | HMGA2: Negative | Annotation (stromal_invasion_1) | Polygon | 732.5444 | 144.6555 | NA | NA | NA |
| HA_29_StromaA | 282b7983-eece-4646-9b65-78b9ea6124c5 | Cell | NA | HMGA2: Negative | Annotation (stromal_invasion_1) | Polygon | 1394.4332 | 144.7038 | NA | NA | NA |
| HA_29_StromaA | 401246d7-183a-4292-a28b-4f81683ad4e4 | Cell | NA | HMGA2: Negative | Annotation (stromal_invasion_1) | Polygon | 1388.3823 | 146.9265 | NA | NA | NA |
| HA_29_StromaA | fb07d294-de23-48ff-8803-5ee37f151435 | Cell | NA | HMGA2: Negative | Annotation (stromal_invasion_1) | Polygon | 1178.6666 | 148.4310 | NA | NA | NA |
| HA_29_StromaA | f960b0a1-162c-4600-98dc-0d1555ca7341 | Cell | NA | HMGA2: Negative | Annotation (stromal_invasion_1) | Polygon | 1526.1434 | 149.2826 | NA | NA | NA |
| HA_29_StromaA | f88d64be-d5cf-4eeb-adf0-ea586b398ef9 | Cell | NA | HMGA2: Positive | Annotation (stromal_invasion_1) | Polygon | 1604.3833 | 1007.2432 | NA | NA | NA |
# Subset the data to only relevant information
og.data <- HMGA2.data_ori %>%
select(Image, Classification, Parent)%>%
data.frame()
HMGA2.posdata <- og.data[og.data$Classification =="HMGA2: Positive", ]
HMGA2.negdata <- og.data[og.data$Classification =="HMGA2: Negative", ]
HMGA2.data <- rbind(HMGA2.posdata, HMGA2.negdata) # these are now all cells wither positive or negative for HMGA2, n = 11 233 cells.
colnames(HMGA2.data) <- c('Image', 'HMGA2_status', 'Parent')
HMGA2.data$Image_id <- gsub("\\..*","",HMGA2.data$Image)
HMGA2.data$Mouse_id <- gsub("HA_","",HMGA2.data$Image)
HMGA2.data$Mouse_id <- gsub("H_","",HMGA2.data$Mouse_id) # addition after name changing..
HMGA2.data$Mouse_id <- gsub("_.*","",HMGA2.data$Mouse_id)
unique(HMGA2.data$Mouse_id) # only case number left
[1] “29” “28” “27” “30”
HMGA2.data$HMGA2_status <- gsub("HMGA2: ","",HMGA2.data$HMGA2_status)
unique(HMGA2.data$HMGA2_status) # only status information left
[1] “Positive” “Negative”
#create a new column to have plain info on the ROI (stroma/acinar aka lobular)
HMGA2.data$invasion_type <- gsub("_i.*","",HMGA2.data$Parent)
HMGA2.data$invasion_type <- gsub("Annotation \\(","",HMGA2.data$invasion_type)
unique(HMGA2.data$invasion_type)
[1] “stromal” “acinar”
# convertion & new columns format
knitr::kable(head(HMGA2.data))
| Image | HMGA2_status | Parent | Image_id | Mouse_id | invasion_type | |
|---|---|---|---|---|---|---|
| 6 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
| 8 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
| 9 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
| 10 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
| 11 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
| 12 | HA_29_StromaA | Positive | Annotation (stromal_invasion_1) | HA_29_StromaA | 29 | stromal |
HMGA2_summary <- HMGA2.data %>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
HMGA2_pos <- HMGA2.data %>%
filter(HMGA2_status == 'Positive')%>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(HMGA2pos = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
HMGA2_Rsummary <- merge(x = HMGA2_summary, y = HMGA2_pos, id = c('Mouse_id', 'Parent'), all = TRUE)
# if NA occurs here in the HMGA2pos column, it means that that ROIs had 0 positive tumor cells there, add that:
HMGA2_Rsummary[is.na(HMGA2_Rsummary)] <- 0
HMGA2_Rsummary <- HMGA2_Rsummary%>%
mutate(HMGApos_percent = HMGA2pos/Detections*100)%>%
data.frame()
knitr::kable(HMGA2_Rsummary)
| Image_id | Mouse_id | invasion_type | Parent | Detections | HMGA2pos | HMGApos_percent |
|---|---|---|---|---|---|---|
| H_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_1) | 225 | 83 | 36.8888889 |
| H_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_3) | 251 | 30 | 11.9521912 |
| H_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_1) | 586 | 188 | 32.0819113 |
| H_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_2) | 613 | 288 | 46.9820555 |
| H_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_3) | 155 | 112 | 72.2580645 |
| H_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_4) | 400 | 188 | 47.0000000 |
| HA_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_1) | 61 | 42 | 68.8524590 |
| HA_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_2) | 110 | 73 | 66.3636364 |
| HA_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_3) | 35 | 0 | 0.0000000 |
| HA_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_4) | 299 | 8 | 2.6755853 |
| HA_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_5) | 198 | 1 | 0.5050505 |
| HA_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_1) | 262 | 220 | 83.9694656 |
| HA_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_2) | 321 | 244 | 76.0124611 |
| HA_28_LobuleA | 28 | acinar | Annotation (acinar_invasion_1) | 222 | 0 | 0.0000000 |
| HA_28_LobuleA | 28 | stromal | Annotation (stromal_invasion_1) | 152 | 1 | 0.6578947 |
| HA_28_LobuleB | 28 | stromal | Annotation (stromal_invasion_1_A) | 193 | 0 | 0.0000000 |
| HA_28_LobuleB | 28 | stromal | Annotation (stromal_invasion_2_A) | 161 | 0 | 0.0000000 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_1) | 230 | 6 | 2.6086957 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_10) | 19 | 2 | 10.5263158 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_11) | 168 | 1 | 0.5952381 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_12) | 25 | 2 | 8.0000000 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_13) | 56 | 1 | 1.7857143 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_14) | 113 | 1 | 0.8849558 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_15) | 86 | 9 | 10.4651163 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_16) | 98 | 0 | 0.0000000 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_17) | 211 | 10 | 4.7393365 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_18) | 67 | 1 | 1.4925373 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_2) | 72 | 15 | 20.8333333 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_3) | 61 | 1 | 1.6393443 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_4) | 331 | 7 | 2.1148036 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_5) | 103 | 34 | 33.0097087 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_6) | 68 | 10 | 14.7058824 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_7) | 32 | 2 | 6.2500000 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_8) | 48 | 13 | 27.0833333 |
| HA_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_9) | 88 | 4 | 4.5454545 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_1) | 42 | 14 | 33.3333333 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_2) | 152 | 0 | 0.0000000 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_3) | 352 | 8 | 2.2727273 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_4) | 184 | 13 | 7.0652174 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_6) | 458 | 68 | 14.8471616 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_7) | 571 | 35 | 6.1295972 |
| HA_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_8) | 418 | 37 | 8.8516746 |
| HA_28_LobuleD | 28 | acinar | Annotation (acinar_invasion_1) | 183 | 88 | 48.0874317 |
| HA_29_StromaA | 29 | stromal | Annotation (stromal_invasion_1) | 421 | 339 | 80.5225653 |
| HA_29_StromaA | 29 | stromal | Annotation (stromal_invasion_2) | 1742 | 699 | 40.1262916 |
| HA_29_StromaA | 29 | stromal | Annotation (stromal_invasion_3) | 590 | 431 | 73.0508475 |
ROIs <- HMGA2_Rsummary %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(ROIs = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
HMGA2_Msummary <- HMGA2.data %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
HMGA2_pos <- HMGA2.data %>%
filter(HMGA2_status == 'Positive')%>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(HMGA2pos = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
HMGA2_Msummary <- merge(x = HMGA2_Msummary, y = HMGA2_pos, id = c('Mouse_id'))
HMGA2_Msummary <- merge(x = HMGA2_Msummary, y = ROIs, id = c('Mouse_id', 'invasion_type'))
HMGA2_Msummary <- HMGA2_Msummary%>%
mutate(HMGApos_percent = HMGA2pos/Detections*100)%>%
data.frame()
knitr::kable(HMGA2_Msummary)
| Mouse_id | invasion_type | Detections | HMGA2pos | ROIs | HMGApos_percent |
|---|---|---|---|---|---|
| 27 | acinar | 703 | 124 | 5 | 17.638691 |
| 27 | stromal | 583 | 464 | 2 | 79.588336 |
| 28 | acinar | 2281 | 207 | 20 | 9.074967 |
| 28 | stromal | 2683 | 176 | 10 | 6.559821 |
| 29 | stromal | 2753 | 1469 | 3 | 53.359971 |
| 30 | acinar | 476 | 113 | 2 | 23.739496 |
| 30 | stromal | 1754 | 776 | 4 | 44.241733 |
# Wilcox test (non-paired)
res.wil <- HMGA2_Rsummary %>%
wilcox_test(HMGApos_percent~invasion_type)%>%
adjust_pvalue(method = 'BH')%>%
add_significance()
res.wil <- res.wil %>% add_xy_position(x = 'invasion_type')
res.wil
## # A tibble: 1 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 HMGApos_percent acinar strom… 27 19 176. 0.0758 0.0758 ns
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## # xmax <dbl>
# plot
box_HMGA2 <- HMGA2_Rsummary%>%
ggplot(aes(x = invasion_type, y = HMGApos_percent))+
geom_boxplot(width = 0.32, outlier.shape = NA)+
geom_jitter(aes(color = Mouse_id), alpha = 1, position=position_jitter(0.15), size = 2.8, seed = 1.5)+
scale_color_brewer(palette = 'Dark2')+
labs(x = '', y = 'HMGA2 positive cells (%)')+
theme_bw()+
theme(legend.position = "right", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
axis.ticks = element_blank())+
stat_pvalue_manual(res.wil, label = 'p.adj.signif',
y.position = 90)+
ylim(0,100)
box_HMGA2
hmga2_split <- split(HMGA2_Rsummary, f = HMGA2_Rsummary$Mouse_id)
lapply(hmga2_split, function (a) {
b <- split(a, f = a$Image_id)
lapply(b, function(c) {
unique(c$Parent)
})
}
)
## $`27`
## $`27`$HA_27_LobuleA
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_2)"
## [3] "Annotation (acinar_invasion_3)" "Annotation (acinar_invasion_4)"
## [5] "Annotation (acinar_invasion_5)" "Annotation (stromal_invasion_1)"
## [7] "Annotation (stromal_invasion_2)"
##
##
## $`28`
## $`28`$HA_28_LobuleA
## [1] "Annotation (acinar_invasion_1)" "Annotation (stromal_invasion_1)"
##
## $`28`$HA_28_LobuleB
## [1] "Annotation (stromal_invasion_1_A)" "Annotation (stromal_invasion_2_A)"
##
## $`28`$HA_28_LobuleC
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_10)"
## [3] "Annotation (acinar_invasion_11)" "Annotation (acinar_invasion_12)"
## [5] "Annotation (acinar_invasion_13)" "Annotation (acinar_invasion_14)"
## [7] "Annotation (acinar_invasion_15)" "Annotation (acinar_invasion_16)"
## [9] "Annotation (acinar_invasion_17)" "Annotation (acinar_invasion_18)"
## [11] "Annotation (acinar_invasion_2)" "Annotation (acinar_invasion_3)"
## [13] "Annotation (acinar_invasion_4)" "Annotation (acinar_invasion_5)"
## [15] "Annotation (acinar_invasion_6)" "Annotation (acinar_invasion_7)"
## [17] "Annotation (acinar_invasion_8)" "Annotation (acinar_invasion_9)"
## [19] "Annotation (stromal_invasion_1)" "Annotation (stromal_invasion_2)"
## [21] "Annotation (stromal_invasion_3)" "Annotation (stromal_invasion_4)"
## [23] "Annotation (stromal_invasion_6)" "Annotation (stromal_invasion_7)"
## [25] "Annotation (stromal_invasion_8)"
##
## $`28`$HA_28_LobuleD
## [1] "Annotation (acinar_invasion_1)"
##
##
## $`29`
## $`29`$HA_29_StromaA
## [1] "Annotation (stromal_invasion_1)" "Annotation (stromal_invasion_2)"
## [3] "Annotation (stromal_invasion_3)"
##
##
## $`30`
## $`30`$H_30_LobuleA
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_3)"
## [3] "Annotation (stromal_invasion_1)" "Annotation (stromal_invasion_2)"
## [5] "Annotation (stromal_invasion_3)" "Annotation (stromal_invasion_4)"
Compared to the HMGA2 quantification, two ROIs could not be identified. Level differences in the sections rendered no tumor in these two regions.
# Subset the data to only relevant information
Gal4.posdata <- og.data[og.data$Classification =="Gal4: Positive", ]
Gal4.negdata <- og.data[og.data$Classification =="Gal4: Negative", ]
Gal4.data <- rbind(Gal4.posdata, Gal4.negdata) # these are now all cells that are either Gal4+ or Gal4-, n = 8 485 cells.
dim(Gal4.data)
[1] 8485 3
colnames(Gal4.data) <- c('Image', 'Gal4_status', 'Parent')
Gal4.data$Image_id <- gsub("\\..*","",Gal4.data$Image)
Gal4.data$Mouse_id <- gsub("GV_","",Gal4.data$Image)
Gal4.data$Mouse_id <- gsub("_.*","",Gal4.data$Mouse_id)
unique(Gal4.data$Mouse_id) # only case number left
[1] “27” “30” “28” “29”
Gal4.data$Gal4_status <- gsub("Gal4: ","",Gal4.data$Gal4_status)
unique(Gal4.data$Gal4_status) # only status information left
[1] “Positive” “Negative”
#create a new column to have plain info on the ROI (stroma/acinar)
Gal4.data$invasion_type <- gsub("_i.*","",Gal4.data$Parent)
Gal4.data$invasion_type <- gsub("Annotation \\(","",Gal4.data$invasion_type)
unique(Gal4.data$invasion_type)
[1] “acinar” “stromal”
# convertion & new columns look as they should
knitr::kable(head(Gal4.data))
| Image | Gal4_status | Parent | Image_id | Mouse_id | invasion_type | |
|---|---|---|---|---|---|---|
| 11237 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
| 11242 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
| 11243 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
| 11245 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
| 11264 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
| 11268 | GV_27_LobuleA | Positive | Annotation (acinar_invasion_1) | GV_27_LobuleA | 27 | acinar |
Gal4_summary <- Gal4.data %>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
Gal4_pos <- Gal4.data %>%
filter(Gal4_status == 'Positive')%>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(Gal4pos = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
Gal4_Rsummary <- merge(x = Gal4_summary, y = Gal4_pos, id = c('Mouse_id', 'Parent'), all = TRUE)
# if NA occurs here in the Gal4pos column, it means that that ROIs had 0 positive tumor cells there, add that:
Gal4_Rsummary[is.na(Gal4_Rsummary)] <- 0
Gal4_Rsummary <- Gal4_Rsummary%>%
mutate(Gal4pos_percent = Gal4pos/Detections*100)%>%
data.frame()
knitr::kable(Gal4_Rsummary)
| Image_id | Mouse_id | invasion_type | Parent | Detections | Gal4pos | Gal4pos_percent |
|---|---|---|---|---|---|---|
| GV_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_1) | 65 | 17 | 26.1538462 |
| GV_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_2) | 103 | 1 | 0.9708738 |
| GV_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_3) | 28 | 1 | 3.5714286 |
| GV_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_4) | 103 | 3 | 2.9126214 |
| GV_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_5) | 99 | 0 | 0.0000000 |
| GV_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_1) | 259 | 25 | 9.6525097 |
| GV_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_2) | 338 | 41 | 12.1301775 |
| GV_28_LobuleA | 28 | acinar | Annotation (acinar_invasion_1) | 108 | 60 | 55.5555556 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_1) | 374 | 165 | 44.1176471 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_10) | 29 | 26 | 89.6551724 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_11) | 202 | 174 | 86.1386139 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_12) | 35 | 33 | 94.2857143 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_13) | 31 | 6 | 19.3548387 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_14) | 35 | 28 | 80.0000000 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_15) | 132 | 33 | 25.0000000 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_16) | 103 | 82 | 79.6116505 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_17) | 182 | 161 | 88.4615385 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_18) | 94 | 60 | 63.8297872 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_2) | 79 | 69 | 87.3417722 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_3) | 61 | 52 | 85.2459016 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_4) | 392 | 239 | 60.9693878 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_5) | 139 | 132 | 94.9640288 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_6) | 91 | 83 | 91.2087912 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_7) | 30 | 30 | 100.0000000 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_8) | 118 | 109 | 92.3728814 |
| GV_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_9) | 146 | 49 | 33.5616438 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_1_A) | 265 | 248 | 93.5849057 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_1) | 47 | 13 | 27.6595745 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_2_A) | 246 | 176 | 71.5447154 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_2) | 85 | 58 | 68.2352941 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_3) | 160 | 47 | 29.3750000 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_4) | 225 | 4 | 1.7777778 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_6) | 278 | 234 | 84.1726619 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_7) | 126 | 114 | 90.4761905 |
| GV_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_8) | 335 | 287 | 85.6716418 |
| GV_29_StromaA | 29 | stromal | Annotation (stromal_invasion_1) | 393 | 64 | 16.2849873 |
| GV_29_StromaA | 29 | stromal | Annotation (stromal_invasion_2) | 1666 | 293 | 17.5870348 |
| GV_29_StromaA | 29 | stromal | Annotation (stromal_invasion_3) | 513 | 101 | 19.6881092 |
| GV_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_1) | 128 | 5 | 3.9062500 |
| GV_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_3) | 82 | 8 | 9.7560976 |
| GV_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_1) | 146 | 38 | 26.0273973 |
| GV_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_2) | 187 | 10 | 5.3475936 |
| GV_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_3) | 100 | 2 | 2.0000000 |
| GV_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_4) | 127 | 103 | 81.1023622 |
ROIs <- Gal4_Rsummary %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(ROIs = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
Gal4_Msummary <- Gal4.data %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
Gal4_pos <- Gal4.data %>%
filter(Gal4_status == 'Positive')%>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(Gal4pos = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
Gal4_Msummary <- merge(x = Gal4_Msummary, y = Gal4_pos, id = c('Mouse_id'))
Gal4_Msummary <- merge(x = Gal4_Msummary, y = ROIs, id = c('Mouse_id', 'invasion_type'))
Gal4_Msummary <- Gal4_Msummary%>%
mutate(Gal4pos_percent = Gal4pos/Detections*100)%>%
data.frame()
knitr::kable(Gal4_Msummary)
| Mouse_id | invasion_type | Detections | Gal4pos | ROIs | Gal4pos_percent |
|---|---|---|---|---|---|
| 27 | acinar | 398 | 22 | 5 | 5.527638 |
| 27 | stromal | 597 | 66 | 2 | 11.055276 |
| 28 | acinar | 2381 | 1591 | 19 | 66.820664 |
| 28 | stromal | 1767 | 1181 | 9 | 66.836446 |
| 29 | stromal | 2572 | 458 | 3 | 17.807154 |
| 30 | acinar | 210 | 13 | 2 | 6.190476 |
| 30 | stromal | 560 | 153 | 4 | 27.321429 |
# Wilcox test (non-paired)
res.wilG <- Gal4_Rsummary%>%
wilcox_test(Gal4pos_percent~invasion_type)%>%
adjust_pvalue(method = 'BH')%>%
add_significance()
res.wilG <- res.wilG %>% add_xy_position(x = 'invasion_type')
res.wilG
## # A tibble: 1 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Gal4p… acinar strom… 26 18 282 0.26 0.26 ns 101.
## # ℹ 3 more variables: groups <named list>, xmin <dbl>, xmax <dbl>
# plot
box_Gal4 <- Gal4_Rsummary%>%
ggplot(aes(x = invasion_type, y = Gal4pos_percent))+
geom_boxplot(width = 0.32, outlier.shape = NA)+
geom_jitter(aes(color = Mouse_id), alpha = 1, position=position_jitter(0.15), size = 2.8, seed = 1.5)+
scale_color_brewer(palette = 'Dark2')+
labs(x = '', y = 'Gal4 positive cells (%)')+
theme_bw()+
theme(legend.position = "right", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
axis.ticks = element_blank())+
stat_pvalue_manual(res.wilG, label = 'p.adj.signif',
y.position = 90)+
ylim(0,100)
box_Gal4
gal4_split <- split(Gal4_Rsummary, f = Gal4_Rsummary$Mouse_id)
lapply(gal4_split, function (a) {
b <- split(a, f = a$Image_id)
lapply(b, function(c) {
unique(c$Parent)
})
}
)
## $`27`
## $`27`$GV_27_LobuleA
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_2)"
## [3] "Annotation (acinar_invasion_3)" "Annotation (acinar_invasion_4)"
## [5] "Annotation (acinar_invasion_5)" "Annotation (stromal_invasion_1)"
## [7] "Annotation (stromal_invasion_2)"
##
##
## $`28`
## $`28`$GV_28_LobuleA
## [1] "Annotation (acinar_invasion_1)"
##
## $`28`$GV_28_LobuleC
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_10)"
## [3] "Annotation (acinar_invasion_11)" "Annotation (acinar_invasion_12)"
## [5] "Annotation (acinar_invasion_13)" "Annotation (acinar_invasion_14)"
## [7] "Annotation (acinar_invasion_15)" "Annotation (acinar_invasion_16)"
## [9] "Annotation (acinar_invasion_17)" "Annotation (acinar_invasion_18)"
## [11] "Annotation (acinar_invasion_2)" "Annotation (acinar_invasion_3)"
## [13] "Annotation (acinar_invasion_4)" "Annotation (acinar_invasion_5)"
## [15] "Annotation (acinar_invasion_6)" "Annotation (acinar_invasion_7)"
## [17] "Annotation (acinar_invasion_8)" "Annotation (acinar_invasion_9)"
## [19] "Annotation (stromal_invasion_1_A)" "Annotation (stromal_invasion_1)"
## [21] "Annotation (stromal_invasion_2_A)" "Annotation (stromal_invasion_2)"
## [23] "Annotation (stromal_invasion_3)" "Annotation (stromal_invasion_4)"
## [25] "Annotation (stromal_invasion_6)" "Annotation (stromal_invasion_7)"
## [27] "Annotation (stromal_invasion_8)"
##
##
## $`29`
## $`29`$GV_29_StromaA
## [1] "Annotation (stromal_invasion_1)" "Annotation (stromal_invasion_2)"
## [3] "Annotation (stromal_invasion_3)"
##
##
## $`30`
## $`30`$GV_30_LobuleA
## [1] "Annotation (acinar_invasion_1)" "Annotation (acinar_invasion_3)"
## [3] "Annotation (stromal_invasion_1)" "Annotation (stromal_invasion_2)"
## [5] "Annotation (stromal_invasion_3)" "Annotation (stromal_invasion_4)"
Compared to the HMGA2 quantification, two ROIs could not be identified. Level differences in the sections rendered no tumor in these two regions, which is the same situation as the Gal4 quantification above.
# Subset the data to only relevant information
GATA6.posdata <- og.data[og.data$Classification =="GATA6: Positive", ]
GATA6.negdata <- og.data[og.data$Classification =="GATA6: Negative", ]
GATA6.data <- rbind(GATA6.posdata, GATA6.negdata) # these are now all cells that are either GATA6+ or GATA6- cells, n = 8 141 cells.
dim(GATA6.data)
[1] 8141 3
colnames(GATA6.data) <- c('Image', 'GATA6_status', 'Parent')
GATA6.data$Image_id <- gsub("\\..*","",GATA6.data$Image)
GATA6.data$Mouse_id <- gsub("HG_","",GATA6.data$Image)
GATA6.data$Mouse_id <- gsub("_.*","",GATA6.data$Mouse_id)
unique(GATA6.data$Mouse_id) # only case number left
[1] “30” “28” “27” “29”
GATA6.data$GATA6_status <- gsub("GATA6: ","",GATA6.data$GATA6_status)
unique(GATA6.data$GATA6_status) # only status information left
[1] “Positive” “Negative”
#create a new column to have plain info on the ROI (stroma/acinar)
GATA6.data$invasion_type <- gsub("_i.*","",GATA6.data$Parent)
GATA6.data$invasion_type <- gsub("Annotation \\(","",GATA6.data$invasion_type)
unique(GATA6.data$invasion_type)
[1] “acinar” “stromal”
# convertion & new columns look as they should
knitr::kable(head(GATA6.data))
| Image | GATA6_status | Parent | Image_id | Mouse_id | invasion_type | |
|---|---|---|---|---|---|---|
| 12999 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
| 13000 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
| 13001 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
| 13002 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
| 13003 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
| 13004 | HG_30_LobuleA | Positive | Annotation (acinar_invasion_1) | HG_30_LobuleA | 30 | acinar |
GATA6_summary <- GATA6.data %>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
GATA6_pos <- GATA6.data %>%
filter(GATA6_status == 'Positive')%>%
select(Image_id, Mouse_id, invasion_type, Parent)%>%
group_by(Image_id, Mouse_id, invasion_type, Parent)%>%
dplyr::summarise(GATA6pos = n())
## `summarise()` has grouped output by 'Image_id', 'Mouse_id', 'invasion_type'.
## You can override using the `.groups` argument.
GATA6_Rsummary <- merge(x = GATA6_summary, y = GATA6_pos, id = c('Mouse_id', 'Parent'), all = TRUE)
GATA6_Rsummary[is.na(GATA6_Rsummary)] <- 0
GATA6_Rsummary <- GATA6_Rsummary%>%
mutate(GATA6pos_percent = GATA6pos/Detections*100)%>%
data.frame()
knitr::kable(GATA6_Rsummary)
| Image_id | Mouse_id | invasion_type | Parent | Detections | GATA6pos | GATA6pos_percent |
|---|---|---|---|---|---|---|
| HG_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_1) | 73 | 55 | 75.34247 |
| HG_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_2) | 88 | 43 | 48.86364 |
| HG_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_3) | 24 | 14 | 58.33333 |
| HG_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_4) | 64 | 22 | 34.37500 |
| HG_27_LobuleA | 27 | acinar | Annotation (acinar_invasion_5) | 74 | 19 | 25.67568 |
| HG_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_1) | 234 | 178 | 76.06838 |
| HG_27_LobuleA | 27 | stromal | Annotation (stromal_invasion_2) | 342 | 264 | 77.19298 |
| HG_28_LobuleA | 28 | acinar | Annotation (acinar_invasion_1) | 86 | 29 | 33.72093 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_1) | 248 | 70 | 28.22581 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_10) | 37 | 31 | 83.78378 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_11) | 173 | 91 | 52.60116 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_12) | 30 | 26 | 86.66667 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_13) | 40 | 7 | 17.50000 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_14) | 66 | 47 | 71.21212 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_15) | 97 | 52 | 53.60825 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_16) | 104 | 89 | 85.57692 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_17) | 178 | 129 | 72.47191 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_18) | 95 | 73 | 76.84211 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_2) | 50 | 41 | 82.00000 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_3) | 82 | 55 | 67.07317 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_4) | 411 | 179 | 43.55231 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_5) | 121 | 93 | 76.85950 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_6) | 98 | 87 | 88.77551 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_7) | 46 | 42 | 91.30435 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_8) | 121 | 103 | 85.12397 |
| HG_28_LobuleC | 28 | acinar | Annotation (acinar_invasion_9) | 167 | 128 | 76.64671 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_1_A) | 277 | 212 | 76.53430 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_1) | 48 | 19 | 39.58333 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_2_A) | 249 | 173 | 69.47791 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_2) | 93 | 26 | 27.95699 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_3) | 169 | 72 | 42.60355 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_4) | 205 | 180 | 87.80488 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_6) | 292 | 212 | 72.60274 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_7) | 145 | 137 | 94.48276 |
| HG_28_LobuleC | 28 | stromal | Annotation (stromal_invasion_8) | 320 | 228 | 71.25000 |
| HG_29_StromaA | 29 | stromal | Annotation (stromal_invasion_1) | 463 | 402 | 86.82505 |
| HG_29_StromaA | 29 | stromal | Annotation (stromal_invasion_2) | 1298 | 914 | 70.41602 |
| HG_29_StromaA | 29 | stromal | Annotation (stromal_invasion_3) | 522 | 466 | 89.27203 |
| HG_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_1) | 145 | 126 | 86.89655 |
| HG_30_LobuleA | 30 | acinar | Annotation (acinar_invasion_3) | 95 | 68 | 71.57895 |
| HG_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_1) | 236 | 184 | 77.96610 |
| HG_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_2) | 191 | 178 | 93.19372 |
| HG_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_3) | 110 | 98 | 89.09091 |
| HG_30_LobuleA | 30 | stromal | Annotation (stromal_invasion_4) | 134 | 106 | 79.10448 |
ROIs <- GATA6_Rsummary %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(ROIs = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
GATA6_Msummary <- GATA6.data %>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(Detections = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
GATA6_pos <- GATA6.data %>%
filter(GATA6_status == 'Positive')%>%
select(Mouse_id, invasion_type)%>%
group_by(Mouse_id, invasion_type)%>%
dplyr::summarise(GATA6pos = n())
## `summarise()` has grouped output by 'Mouse_id'. You can override using the
## `.groups` argument.
GATA6_Msummary <- merge(x = GATA6_Msummary, y = GATA6_pos, id = c('Mouse_id'))
GATA6_Msummary <- merge(x = GATA6_Msummary, y = ROIs, id = c('Mouse_id', 'invasion_type'))
GATA6_Msummary <- GATA6_Msummary%>%
mutate(GATA6pos_percent = GATA6pos/Detections*100)%>%
data.frame()
knitr::kable(GATA6_Msummary)
| Mouse_id | invasion_type | Detections | GATA6pos | ROIs | GATA6pos_percent |
|---|---|---|---|---|---|
| 27 | acinar | 323 | 153 | 5 | 47.36842 |
| 27 | stromal | 576 | 442 | 2 | 76.73611 |
| 28 | acinar | 2250 | 1372 | 19 | 60.97778 |
| 28 | stromal | 1798 | 1259 | 9 | 70.02225 |
| 29 | stromal | 2283 | 1782 | 3 | 78.05519 |
| 30 | acinar | 240 | 194 | 2 | 80.83333 |
| 30 | stromal | 671 | 566 | 4 | 84.35171 |
# Wilcox test (non-paired)
res.wilGATA6 <- GATA6_Rsummary%>%
wilcox_test(GATA6pos_percent~invasion_type)%>%
adjust_pvalue(method = 'BH')%>%
add_significance()
res.wilGATA6 <- res.wilGATA6 %>% add_xy_position(x = 'invasion_type')
res.wilGATA6
## # A tibble: 1 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 GATA6… acinar strom… 26 18 175 0.164 0.164 ns 94.9
## # ℹ 3 more variables: groups <named list>, xmin <dbl>, xmax <dbl>
# plot
box_GATA6 <- GATA6_Rsummary %>%
ggplot(aes(x = invasion_type, y = GATA6pos_percent))+
geom_boxplot(width = 0.32, outlier.shape = NA)+
geom_jitter(aes(color = Mouse_id), alpha = 1, position=position_jitter(0.15), size = 2.8, seed = 1.5)+
scale_color_brewer(palette = 'Dark2')+
labs(x = '', y = 'GATA6 positive cells (%)')+
theme_bw()+
theme(legend.position = "right", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
axis.ticks = element_blank())+
stat_pvalue_manual(res.wilGATA6, label = 'p.adj.signif',
y.position = 90)+
ylim(0,100)
box_GATA6
Merging marker expression depending on the ROI identity
#HMGA2
HMGA2_m <- HMGA2_Rsummary
HMGA2_m$ROI_ID <- gsub(".*_invasion_","",HMGA2_m$Parent)
HMGA2_m$ROI_ID <- gsub("\\)","",HMGA2_m$ROI_ID)
unique(HMGA2_m$ROI_ID)
## [1] "1" "3" "2" "4" "5" "1_A" "2_A" "10" "11" "12" "13" "14"
## [13] "15" "16" "17" "18" "6" "7" "8" "9"
HMGA2_m$Image <- HMGA2_m$Image_id
HMGA2_m$Image <- gsub(".*_.*_", "", HMGA2_m$Image)
HMGA2_m$Group <- factor(nrow(HMGA2_m))
HMGA2_m$Group <- paste(HMGA2_m$Mouse_id, HMGA2_m$invasion_type, HMGA2_m$Image, HMGA2_m$ROI_ID, sep = "_")
#Gal4
Gal4_m <- Gal4_Rsummary
Gal4_m$ROI_ID <- gsub(".*_invasion_","",Gal4_m$Parent)
Gal4_m$ROI_ID <- gsub("\\)","",Gal4_m$ROI_ID)
unique(Gal4_m$ROI_ID)
## [1] "1" "2" "3" "4" "5" "10" "11" "12" "13" "14" "15" "16"
## [13] "17" "18" "6" "7" "8" "9" "1_A" "2_A"
Gal4_m$Image <- Gal4_m$Image_id
Gal4_m$Image <- gsub(".*_.*_", "", Gal4_m$Image)
Gal4_m$Group <- factor(nrow(Gal4_m))
Gal4_m$Group <- paste(Gal4_m$Mouse_id, Gal4_m$invasion_type, Gal4_m$Image, Gal4_m$ROI_ID, sep = "_")
#align the rois that were set on the images LobuleC in Gal4 quant, but LobuleB in HMGA2 quant:
#28_stromal_LobuleB_1_A = 28_stromal_LobuleC_1_A and #28_stromal_LobuleB_2_A = 28_stromal_LobuleC_2_A
#the group column of the gal4 dataset will be adjusted to that.
Gal4_m <- Gal4_m %>% mutate(Group = ifelse(grepl("28_stromal_LobuleC_1_A", .$Group), '28_stromal_LobuleB_1_A', .$Group))
Gal4_m <- Gal4_m %>% mutate(Group = ifelse(grepl("28_stromal_LobuleC_2_A", .$Group), '28_stromal_LobuleB_2_A', .$Group))
Gal4_m <- Gal4_m[, -c(1, 4:6, 8, 9)]
HMGA2_m <- HMGA2_m[, -c(1, 4:6, 8, 9)]
mHMGA2_Gal4 <- merge(x = Gal4_m, y = HMGA2_m, id = c('Mouse_id', 'invasion_type', 'Group'), all = TRUE)
mheatmat <- mHMGA2_Gal4
rownames(mheatmat) <- mheatmat$Group
meta_mheatmap <- mheatmat #for metadata
meta_mheatmap <- meta_mheatmap[, -c(3:5)]
meta_mheatmap <- meta_mheatmap[-c(27, 28),]
meta_mheatmap$Mouse_id <- paste("m", meta_mheatmap$Mouse_id, sep = "")
mheatmat <- mheatmat[, -c(1:3)]
mheatmat <- mheatmat[-c(27, 28), ]
meta_colors <- list(Mouse_id=c(m27="#fbb4ae", m28="#b3cde3", m29="#ccebc5", m30="#decbe4"),
invasion_type=c(acinar = "#66c2a5", stromal = "#fc8d62"))
pheatmap(mheatmat, fontsize_col = 12, annotation_row = meta_mheatmap, annotation_colors = meta_colors, show_colnames = TRUE, show_rownames = FALSE, main = "Heatmap of four mice of the injection model, values = % positive cells per region of interest", cluster_cols = FALSE, cluster_rows = TRUE)
## Warning: The input is a data frame, convert it to the matrix.
pcamat <- mHMGA2_Gal4[, -3]
pcamat <- pcamat[-c(27, 28), ]
PCA_all <- autoplot(prcomp(pcamat[3:4]), data = pcamat, colour = 'invasion_type', shape = 'invasion_type', size = 5, label = TRUE, repel = TRUE, label.repel = TRUE, main = "PCA plot of injection model, n = 4 mice. 1 dot = 1 ROI's dimension reduction of two stains")
PCA_all
PCA_all_nolabel <- autoplot(prcomp(pcamat[3:4]), data = pcamat, colour = 'invasion_type', shape = 'invasion_type', size = 5, label = FALSE, repel = TRUE, main = "PCA plot of injection model, n = 4 mice. 1 dot = 1 ROI's dimension reduction of two stains")
PCA_all_nolabel
PCA_all_id <- autoplot(prcomp(pcamat[3:4]), data = pcamat, colour = 'Mouse_id', size = 5, label = FALSE, repel = TRUE, main = "PCA plot of injection model, n = 4 mice, color on mouse ID. 1 dot = 1 ROI's dimension reduction of two stains")
PCA_all_id
PCA_all_loadings <- autoplot(prcomp(pcamat[3:4]), loadings = TRUE, loadings.label = TRUE, loadings.label.repel = TRUE, loadings.label.size = 5, data = pcamat, colour = 'invasion_type', shape = 'invasion_type', size = 3, label = FALSE, repel = TRUE, main = "PCA plot of injection model, n = 4 mice with loadings. 1 dot = 1 ROI's dimension reduction of two stains")
PCA_all_loadings
summary(prcomp(pcamat[3:4]))
## Importance of components:
## PC1 PC2
## Standard deviation 39.3145 22.0744
## Proportion of Variance 0.7603 0.2397
## Cumulative Proportion 0.7603 1.0000
fviz_screeplot(prcomp(pcamat[3:4]), addlabels = TRUE, main = "Proportion of variance / Eigenvalues of the PC:s, injection model, n = 4 mice")
# Add in the fviz_contrib
fviz_contrib(prcomp(pcamat[3:4]), choice = "var", axes = 1, title = "Ratio of stains contributing to PC1, injection model, n = 4 mice")
fviz_contrib(prcomp(pcamat[3:4]), choice = "var", axes = 2, title = "Ratio of stains contributing to PC2, injection model, n = 4 mice")
mall_cor <- mHMGA2_Gal4
rownames(mall_cor) <- mall_cor$Group
mall_cor <- mall_cor[, -c(1:3)]
mall_cor <- mall_cor[ -c(27:28),]
cor_all <- cor(mall_cor)
testRes_all= cor.mtest(mall_cor, conf.level = 0.95)
cor_p_all<- testRes_all$p
cor_p_all
## Gal4pos_percent HMGApos_percent
## Gal4pos_percent 0.000000000 0.002197816
## HMGApos_percent 0.002197816 0.000000000
col_fun<- circlize::colorRamp2(c(-1, 0, 1), c("#008837", "#f7f7f7", "#7b3294"))
cell_fun = function(j, i, x, y, w, h, fill){
if(as.numeric(x) <= 1 - as.numeric(y) + 1e-6) {
grid.rect(x, y, w, h, gp = gpar(fill = fill, col = fill))
}
if (cor_p_all[i, j] < 0.01 & as.numeric(x) <= 1 - as.numeric(y) + 1e-6){
grid.text(paste0(sprintf("%.2f", cor_all[i, j]),"**"), x, y, gp = gpar(fontsize = 10))
} else if (cor_p_all[i, j] <= 0.05 & as.numeric(x) <= 1 - as.numeric(y) + 1e-6){
grid.text(paste0(sprintf("%.2f", cor_all[i, j]),"*"), x, y, gp = gpar(fontsize = 10))
}
}
correlation_all <- ComplexHeatmap::Heatmap(cor_all,
rect_gp = gpar(type = "none"),
column_dend_side = "bottom",
column_title = "Correlation of stains of the injection model. Unmatched ROIs are excluded",
column_title_gp = gpar(fontsize = 12, fontface = "bold"),
name = "Spearman's rank correlation coefficient", col = col_fun,
cell_fun = cell_fun,
cluster_rows = TRUE, cluster_columns = TRUE,
row_names_side = "left")
lgd_list = list(
Legend( labels = c("<0.01", "<0.05"), title = "p-value",
graphics = list(
function(x, y, w, h) grid.text("**", x = x, y = y,
gp = gpar(fill = "black")),
function(x, y, w, h) grid.text("*", x = x, y = y,
gp = gpar(fill = "black")))
))
correlation_all
ggplot(mHMGA2_Gal4, aes(x = Gal4pos_percent, y = HMGApos_percent, colour = Mouse_id)) +
geom_point()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(mHMGA2_Gal4, aes(x = Gal4pos_percent, y = HMGApos_percent, colour = invasion_type)) +
geom_point() +
geom_smooth(method = "loess") +
facet_grid(invasion_type ~ .) +
labs(title = "Line: loess smoothing and Spearman correlation coefficient") +
stat_cor(p.accuracy = 0.005, r.accuracy = 0.01, method = "spearman") +
theme_clean()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(mHMGA2_Gal4, aes(x = Gal4pos_percent, y = HMGApos_percent)) +
geom_point() +
geom_smooth(method = "loess")+
stat_cor(p.accuracy = 0.005, r.accuracy = 0.01, method = "spearman") +
labs(title = "Line: Loess smoothing and Spearman correlation coefficient") +
theme_clean()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
cor(mHMGA2_Gal4$Gal4pos_percent, mHMGA2_Gal4$HMGApos_percent, use = "pairwise.complete.obs")
## [1] -0.4496448
# lm fit
ggplot(mHMGA2_Gal4, aes(x = Gal4pos_percent, y = HMGApos_percent, colour = invasion_type)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(invasion_type ~ .) +
labs(title = "Line: lm and Spearman correlation coefficient") +
stat_cor(p.accuracy = 0.005, r.accuracy = 0.01, method = "spearman") +
theme_clean()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(mHMGA2_Gal4, aes(x = Gal4pos_percent, y = HMGApos_percent)) +
geom_point() +
geom_smooth(method = "lm")+
stat_cor(p.accuracy = 0.005, r.accuracy = 0.01, method = "spearman") +
labs(title = "Line: lm and Spearman correlation coefficient") +
theme_clean()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
sm_HMGA2_GATA6 <- split(mHMGA2_Gal4, f = mHMGA2_Gal4$invasion_type)
Gal4mean <- lapply(sm_HMGA2_GATA6, function (a) {
mean(a$Gal4pos_percent, na.rm = TRUE)
}
)
HMGA2mean <- lapply(sm_HMGA2_GATA6, function (a) {
mean(a$HMGApos_percent, na.rm = TRUE)
}
)
Gal4mean
## $acinar
## [1] 54.57485
##
## $stromal
## [1] 41.23989
HMGA2mean
## $acinar
## [1] 14.3187
##
## $stromal
## [1] 32.90322
Gal4median <- lapply(sm_HMGA2_GATA6, function (a) {
median(a$Gal4pos_percent, na.rm = TRUE)
}
)
HMGA2median <- lapply(sm_HMGA2_GATA6, function (a) {
median(a$HMGApos_percent, na.rm = TRUE)
}
)
Gal4median
## $acinar
## [1] 62.39959
##
## $stromal
## [1] 26.84349
HMGA2median
## $acinar
## [1] 4.739336
##
## $stromal
## [1] 32.08191
Boxplots, paired on the roi id
order_HGal_melt_og <- mHMGA2_Gal4
colnames(order_HGal_melt_og) [4] <- "Gal4"
colnames(order_HGal_melt_og) [5] <- "HMGA2"
#Two rois could not be matched overlappingly, exclude those:
order_HGal_melt_og <- order_HGal_melt_og[-c(27, 28),]
order_HGal_melt <- melt(order_HGal_melt_og, id.vars = c("Mouse_id", "invasion_type", "Group"), value.name = "Fraction", variable.name = "Marker")
order_HGal <- order_HGal_melt[order(order_HGal_melt[,3] ),] # Ordering by the third column 'Group' = specific ROI ID
head(order_HGal)
## Mouse_id invasion_type Group Marker Fraction
## 1 27 acinar 27_acinar_LobuleA_1 Gal4 26.1538462
## 45 27 acinar 27_acinar_LobuleA_1 HMGA2 68.8524590
## 2 27 acinar 27_acinar_LobuleA_2 Gal4 0.9708738
## 46 27 acinar 27_acinar_LobuleA_2 HMGA2 66.3636364
## 3 27 acinar 27_acinar_LobuleA_3 Gal4 3.5714286
## 47 27 acinar 27_acinar_LobuleA_3 HMGA2 0.0000000
#stat tests. Comparison: are HMGA2 and Gal4 differentially expressed in (the same) ROI?
wilcox_pair<- order_HGal %>%
wilcox_test(Fraction ~ Marker, paired = TRUE) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj")
wilcox_pair <- wilcox_pair %>% add_xy_position()
wilcox_pair
## # A tibble: 1 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Fraction Gal4 HMGA2 44 44 758 0.0017 0.0017 **
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## # xmax <dbl>
# Comparison: are HMGA2 and Gal4 differentially expressed in (the same) ROI, separated on the tissue location?
wilcox_pair_g <- order_HGal %>%
group_by(invasion_type) %>%
wilcox_test(Fraction ~ Marker, paired = TRUE) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj")
wilcox_pair_g <- wilcox_pair_g %>% add_xy_position()
wilcox_pair_g [11] <- 110
wilcox_pair_g
## # A tibble: 2 × 14
## invasion_type .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 acinar Fraction Gal4 HMGA2 26 26 312 0.000217 0.000434
## 2 stromal Fraction Gal4 HMGA2 18 18 99 0.58 0.58
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## # groups <named list>, xmin <dbl>, xmax <dbl>
# Boxplot
boxes_HGal <- ggplot(order_HGal, aes(x = Marker, y = Fraction)) +
geom_boxplot(aes(color = Marker, fill = Marker), alpha = 0.4) +
geom_line(aes(group = Group), colour = "grey") +
geom_point(size = 1.5, aes(fill = Marker, color = Marker)) +
xlab(NULL) + ylab("Fraction of positive cells/ROI") +
stat_summary(fun.data = function(x) data.frame(y = 95, label = paste("Mean =",round(mean(x), digits = 1))), geom="text") +
labs(title = "Differential expression of HMGA2 and Gal4 in lobules and stroma separately", subtitle = "1 data point = value from 1 ROI") +
theme_clean() +
scale_color_manual(values = c("#984ea3", "#4daf4a")) +
scale_fill_manual(values = c("#984ea3", "#4daf4a"))
boxes_HGal_sign <- boxes_HGal + stat_pvalue_manual(label = "p.adj.signif",
wilcox_pair, tip.length = 0.01)
boxes_HGal_sign
boxes_HGal_g <- ggplot(order_HGal, aes(x = Marker, y = Fraction)) +
geom_boxplot(aes(color = Marker, fill = Marker), alpha = 0.4) +
geom_line(aes(group = Group), colour = "grey") +
geom_point(size = 1.5, aes(fill = Marker, color = Marker)) +
facet_wrap(invasion_type ~.) +
xlab(NULL) + ylab("Fraction of positive cells/ROI") +
stat_summary(fun.data = function(x) data.frame(y = 95, label = paste("Mean =",round(mean(x), digits = 1))), geom="text") +
labs(title = "Differential expression of HMGA2 and Gal4 in lobules and stroma", subtitle = "1 data point = value from 1 ROI") +
scale_color_manual(values = c("#984ea3", "#4daf4a")) +
scale_fill_manual(values = c("#984ea3", "#4daf4a")) +
theme_few()
boxes_HGal_g_sign <- boxes_HGal_g + stat_pvalue_manual(label = "p.adj.signif",
wilcox_pair_g, tip.length = 0.01)
boxes_HGal_g_sign
distrib_Gal4 <-ggplot(order_HGal_melt_og, aes(x=fct_reorder(Group, Gal4), y=Gal4, fill = invasion_type)) +
geom_col(width = 0.7) +
labs(title = "Distribution of Gal4 expression in all ROIs") +
theme_clean() +
labs(x = "Each case + ROI combination", y = "% Gal4+ tumor cells") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_fill_manual(values = c("dodgerblue1", "dodgerblue4")) +
geom_hline(yintercept = 30,linetype = "longdash")
distrib_Gal4
distrib_HMGA2 <-ggplot(order_HGal_melt_og, aes(x=fct_reorder(Group, HMGA2), y=HMGA2, fill = invasion_type)) +
geom_col(width = 0.7) +
labs(title = "Distribution of HMGA2 expression in all ROIs") +
theme_clean() +
labs(x = "Each case + ROI combination", y = "% HMGA2+ tumor cells") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_fill_manual(values = c("tomato1", "tomato4"))+
geom_hline(yintercept = 65,linetype = "longdash") +
geom_hline(yintercept = 20,linetype = "longdash")
distrib_HMGA2
#Cutoffs to work with For HMGA2: High = >50% Medium: 20-50% Low:
<20%
For Gal4: High: =>30% Low: <30%
Assign an overall subtype for each ROI, depending on the expression pattern och Galectin-4 and HMGA2
subtypes <- order_HGal_melt_og
subtypes$Subtype <- factor(nrow(subtypes))
subtypes$Weight <- factor(nrow(subtypes))
subtypes$Subtype_group <- factor(nrow(subtypes))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 < 20 & Gal4 < 30, 'Hybrid', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 < 20 & Gal4 < 30, '1', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 < 20 & Gal4 < 30, 'Weak_hybrid', .$Subtype_group))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 < 20 & Gal4 > 30, 'Classical', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 < 20 & Gal4 > 30, '2', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 < 20 & Gal4 > 30, 'Strong_classical', .$Subtype_group))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 < 30, 'Basal', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 < 30, '1', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 < 30, 'Weak_basal', .$Subtype_group))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 > 30, 'Classical', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 > 30, '1', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 < 50 & HMGA2 > 20 & Gal4 > 30, 'Weak_classical', .$Subtype_group))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 > 50 & Gal4 > 30, 'Hybrid', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 > 50 & Gal4 > 30, '2', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 > 50 & Gal4 > 30, 'Strong_hybrid', .$Subtype_group))
subtypes <- subtypes %>% mutate(Subtype = ifelse(HMGA2 > 50 & Gal4 < 30, 'Basal', .$Subtype))
subtypes <- subtypes %>% mutate(Weight = ifelse(HMGA2 > 50 & Gal4 < 30, '2', .$Weight))
subtypes <- subtypes %>% mutate(Subtype_group = ifelse(HMGA2 > 50 & Gal4 < 30, 'Strong_basal', .$Subtype_group))
unique(subtypes$Subtype_group)
## [1] "Strong_basal" "Weak_hybrid" "Strong_classical" "Weak_classical"
## [5] "Weak_basal"
# Interestingly, no regions are "strong hybrids", possibly indicating that an intermediate state when switching occurs constitutes lower % positive for the respective subtype markers.
#new metadata matrix with the new subtype info
meta_mheatmap2 <- subtypes
rownames(meta_mheatmap2) <- meta_mheatmap2$Group
meta_mheatmap2$Mouse_id <- paste("m", meta_mheatmap2$Mouse_id, sep = "")
meta_mheatmap2 <- meta_mheatmap2[, -c(3:5, 7)]
#add colors for subtype metadata
meta_colors2 <- list(Mouse_id=c(m27="#fbb4ae", m28="#b3cde3", m29="#ccebc5", m30="#decbe4"),
invasion_type=c(acinar = "#66c2a5", stromal = "#fc8d62"),
Subtype = c(Classical = "#2b83ba", Basal = "#d7191c", Hybrid = "#ffffbf"),
Subtype_group = c(Strong_classical="#3288bd", Weak_classical = "#abdda4",Weak_hybrid="#ffffbf", Strong_basal="#d53e4f", Weak_basal="#fdae61" ))
heatmap_inj <- pheatmap(mheatmat, fontsize_col = 12, annotation_row = meta_mheatmap2, annotation_colors = meta_colors2, show_colnames = TRUE, show_rownames = FALSE, main = "Heatmap of four mice of the injection model, values = % positive cells per region of interest", cluster_cols = FALSE, cluster_rows = TRUE)
## Warning: The input is a data frame, convert it to the matrix.
heatmap_inj
Variable 1 = Subtype (classical, basal, hybrid) in columns Variable 2 = Tissue location (lobular or stromal) in rows
H0 no difference between lobular and stromal ROIs in which subtype they are (Row and column variables are independent) -> degrees of freedom = (3-1)*(2-1) = 2
#dcast 'subtypes' data frame considering the columns invasion_type and Subtype
subtypes_subset <- subtypes[, c(2, 6)]
ct <- dcast(subtypes_subset, invasion_type ~ Subtype, fun.aggregate = length)
## Using Subtype as value column: use value.var to override.
rownames(ct) <- ct$invasion_type
ct <- ct[, -1]
ct
## Basal Classical Hybrid
## acinar 3 17 6
## stromal 9 7 2
ct_t <- as.table(as.matrix(ct))
ct_balloon <- ggballoonplot(ct, fill = "value", size.range = c(2, 20)) +
scale_fill_gradient(high = "#252525", low = "#d9d9d9") +
theme_minimal()
ct_balloon
chi_ct <- chisq.test(ct_t)
## Warning in chisq.test(ct_t): Chi-squared approximation may be incorrect
chi_ct
##
## Pearson's Chi-squared test
##
## data: ct_t
## X-squared = 7.9758, df = 2, p-value = 0.01854
#Expected counts - to be compared with the ct_t print above which are the observed counts
chi_ct$expected
## Basal Classical Hybrid
## acinar 7.090909 14.181818 4.727273
## stromal 4.909091 9.818182 3.272727
#Pearson residuals: the contributions of each individual cell to the Chi^2 statistic
chi_ct$residuals
## Basal Classical Hybrid
## acinar -1.5362747 0.7483471 0.5853694
## stromal 1.8463724 -0.8994012 -0.7035265
#Pearson residuals visualised
corrplot(chi_ct$residuals, is.cor = FALSE)
#degrees of freedom. If na: Monte Carlo simuation is used instead.
chi_ct$parameter
## df
## 2
With frequencies below 5, a Fishers test may be more appropriate:
fish_ct <- fisher.test(ct_t)
fish_ct
##
## Fisher's Exact Test for Count Data
##
## data: ct_t
## p-value = 0.02738
## alternative hypothesis: two.sided
# alternative, from rstatix - they show the same :
fisher_test(ct_t, detailed = TRUE)
## # A tibble: 1 × 5
## n p method alternative p.signif
## * <int> <dbl> <chr> <chr> <chr>
## 1 44 0.0274 Fisher's Exact test two.sided *
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS 15.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Stockholm
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] corrplot_0.95 reshape2_1.4.4 ggthemes_5.1.0
## [4] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [7] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1
## [10] tibble_3.2.1 tidyverse_2.0.0 ComplexHeatmap_2.22.0
## [13] factoextra_1.0.7 ggfortify_0.4.17 pheatmap_1.0.12
## [16] RColorBrewer_1.1-3 ggpubr_0.6.0 ggplot2_3.5.2
## [19] rstatix_0.7.2 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0
## [4] digest_0.6.37 timechange_0.3.0 lifecycle_1.0.4
## [7] cluster_2.1.8.1 Cairo_1.6-2 magrittr_2.0.3
## [10] compiler_4.4.1 rlang_1.1.6 sass_0.4.10
## [13] tools_4.4.1 utf8_1.2.5 yaml_2.3.10
## [16] knitr_1.50 ggsignif_0.6.4 labeling_0.4.3
## [19] plyr_1.8.9 abind_1.4-8 withr_3.0.2
## [22] BiocGenerics_0.52.0 stats4_4.4.1 colorspace_2.1-1
## [25] scales_1.4.0 iterators_1.0.14 dichromat_2.0-0.1
## [28] cli_3.6.5 rmarkdown_2.29 crayon_1.5.3
## [31] generics_0.1.4 rstudioapi_0.17.1 tzdb_0.5.0
## [34] rjson_0.2.23 cachem_1.1.0 splines_4.4.1
## [37] parallel_4.4.1 matrixStats_1.5.0 vctrs_0.6.5
## [40] Matrix_1.7-3 jsonlite_2.0.0 carData_3.0-5
## [43] car_3.1-3 IRanges_2.40.1 GetoptLong_1.0.5
## [46] hms_1.1.3 S4Vectors_0.44.0 ggrepel_0.9.6
## [49] Formula_1.2-5 clue_0.3-66 foreach_1.5.2
## [52] jquerylib_0.1.4 glue_1.8.0 codetools_0.2-20
## [55] stringi_1.8.7 gtable_0.3.6 shape_1.4.6.1
## [58] pillar_1.10.2 htmltools_0.5.8.1 circlize_0.4.16
## [61] R6_2.6.1 doParallel_1.0.17 lattice_0.22-7
## [64] evaluate_1.0.3 png_0.1-8 backports_1.5.0
## [67] broom_1.0.8 bslib_0.9.0 Rcpp_1.0.14
## [70] nlme_3.1-168 gridExtra_2.3 mgcv_1.9-3
## [73] xfun_0.52 pkgconfig_2.0.3 GlobalOptions_0.1.2